{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv('../data/train_xy_all.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test = pd.read_csv('../data/test_all.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_xy = pd.read_csv('../data/train_xy.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "train_x = pd.read_csv('../data/train_x.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(25000, 157)"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_xy_d = train_xy.drop(['y','cust_id','cust_group'],axis=1)\n",
    "train_x_d = train_x.drop(['cust_id','cust_group'],axis=1)\n",
    "\n",
    "xxx = pd.concat([train_xy_d,train_x_d])\n",
    "xxx.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x_3</th>\n",
       "      <th>x_4</th>\n",
       "      <th>x_5</th>\n",
       "      <th>x_6</th>\n",
       "      <th>x_7</th>\n",
       "      <th>x_8</th>\n",
       "      <th>x_9</th>\n",
       "      <th>x_10</th>\n",
       "      <th>x_11</th>\n",
       "      <th>x_12</th>\n",
       "      <th>...</th>\n",
       "      <th>x_142</th>\n",
       "      <th>x_143</th>\n",
       "      <th>x_144</th>\n",
       "      <th>x_149</th>\n",
       "      <th>x_150</th>\n",
       "      <th>x_153</th>\n",
       "      <th>x_154</th>\n",
       "      <th>x_155</th>\n",
       "      <th>x_157</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-99</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 104 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   x_3  x_4  x_5  x_6  x_7  x_8  x_9  x_10  x_11  x_12 ...  x_142  x_143  \\\n",
       "0  -99  -99  -99  -99  -99  -99  -99   -99   -99   -99 ...      1      1   \n",
       "1  -99  -99  -99  -99  -99  -99  -99   -99   -99   -99 ...      1      1   \n",
       "2    0    0    0    0    0    0    0     0     0     0 ...      1      2   \n",
       "3    0    0    0    0    1    1    0     0     0     0 ...      1      2   \n",
       "4  -99  -99  -99  -99  -99  -99    0     1     1     0 ...      2      1   \n",
       "\n",
       "   x_144  x_149  x_150  x_153  x_154  x_155  x_157  y  \n",
       "0      1      1      1      1      1      1    -99  0  \n",
       "1      2      1      1      1      1      1      2  0  \n",
       "2      2      1      2      1      1      1      2  0  \n",
       "3      2      1      1      1      1      1      4  0  \n",
       "4      1      1      1      1      1      1      1  0  \n",
       "\n",
       "[5 rows x 104 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(25000, 104)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_train = train.drop(['y'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000, 103)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# iv_feature = ['x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9', 'x_10', 'x_11', 'x_12', 'x_13', 'x_14', 'x_15', 'x_16', 'x_17', 'x_18', 'x_19', 'x_20', 'x_21', 'x_22', 'x_23', 'x_24', 'x_25', 'x_26', 'x_27', 'x_28', 'x_29', 'x_30', 'x_31', 'x_32', 'x_33', 'x_34', 'x_35', 'x_36', 'x_37', 'x_38', 'x_39', 'x_40', 'x_41', 'x_42', 'x_43', 'x_44', 'x_45', 'x_46', 'x_47', 'x_48', 'x_50', 'x_51', 'x_52', 'x_53', 'x_54', 'x_55', 'x_56', 'x_57', 'x_58', 'x_59', 'x_60', 'x_61', 'x_62', 'x_63', 'x_64', 'x_65', 'x_66', 'x_67', 'x_68', 'x_69', 'x_70', 'x_71', 'x_72', 'x_73', 'x_74', 'x_75', 'x_76', 'x_77', 'x_78', 'x_79', 'x_80', 'x_81', 'x_82', 'x_83', 'x_84', 'x_85', 'x_86', 'x_87', 'x_88', 'x_90', 'x_97', 'x_98', 'x_99', 'x_100', 'x_101', 'x_139', 'x_140', 'x_141', 'x_142', 'x_143', 'x_144', 'x_149', 'x_150', 'x_153', 'x_154', 'x_155', 'x_157']\n",
    "# x_test = test[iv_feature]\n",
    "# x_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000, 157)"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_test = test.drop(['cust_id','cust_group'],axis=1)\n",
    "x_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x_1</th>\n",
       "      <th>x_2</th>\n",
       "      <th>x_3</th>\n",
       "      <th>x_4</th>\n",
       "      <th>x_5</th>\n",
       "      <th>x_6</th>\n",
       "      <th>x_7</th>\n",
       "      <th>x_8</th>\n",
       "      <th>x_9</th>\n",
       "      <th>x_10</th>\n",
       "      <th>...</th>\n",
       "      <th>x_148</th>\n",
       "      <th>x_149</th>\n",
       "      <th>x_150</th>\n",
       "      <th>x_151</th>\n",
       "      <th>x_152</th>\n",
       "      <th>x_153</th>\n",
       "      <th>x_154</th>\n",
       "      <th>x_155</th>\n",
       "      <th>x_156</th>\n",
       "      <th>x_157</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.291667</td>\n",
       "      <td>0.555388</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.270833</td>\n",
       "      <td>0.770302</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.354167</td>\n",
       "      <td>0.440327</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.208333</td>\n",
       "      <td>0.476509</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.125000</td>\n",
       "      <td>0.955286</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 157 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        x_1       x_2  x_3  x_4  x_5  x_6  x_7  x_8  x_9  x_10  ...    x_148  \\\n",
       "0  0.291667  0.555388  -99  -99  -99  -99  -99  -99  -99   -99  ...        1   \n",
       "1  0.270833  0.770302    0    0    0    0    1    1    0     0  ...        1   \n",
       "2  0.354167  0.440327    0    0    0    0    4    3    0     0  ...        1   \n",
       "3  0.208333  0.476509    0    0    0    0    1    1    0     0  ...        1   \n",
       "4  0.125000  0.955286    0    0    0    0    2    1    0     0  ...        1   \n",
       "\n",
       "   x_149  x_150  x_151  x_152  x_153  x_154  x_155  x_156  x_157  \n",
       "0      1      1      1      1      1      2      2      2      3  \n",
       "1      1      1      1      1      1      2      2      1     10  \n",
       "2      1      1      1      1      1      1      1      3      3  \n",
       "3      1      1      1      1      1      1      1      1      4  \n",
       "4      1      1      1      1      1      1      1      1     10  \n",
       "\n",
       "[5 rows x 157 columns]"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(35000, 157)"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = pd.concat([xxx,x_test])\n",
    "x.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1994d1ae9b0>"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPkAAADuCAYAAAD7nKGzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAGSJJREFUeJzt3Xl8FPX9x/HXJLsJBAJiADlERwWl\nigeiXB6UYlUcb+lhi+Wn1vu29deprW1qtU49q1W0rVhr258tYus1WmzVeuEBCigeeMAolwJRchCS\nvb6/P2ZFjlyb7OY7M/t5Ph77iODu5h2Sd+b4zny/hlIKIUR0legOIIQoLCm5EBEnJRci4qTkQkSc\nlFyIiJOSCxFxUnIhIk5KLkTEScmFiDgpuRARJyUXIuKk5EJEnJRciIiTkgsRcVJyISJOSi5ExEnJ\nhYg4KbkQESclFyLipORCRJyUXIiIk5ILEXFSciEiTkouRMRJyYWIOCm5EBEnJRci4qTkQkSclFyI\niJOSCxFxMd0BROGZtlsCDAR2BCq2ePTc4r9LgY3ZR0P28cV/13mOta77k4t8MGR98mgwbbcc2B/Y\nE9gDGA6YwDBgCBDv4qfYBHjAsuxjefbjh8B7nmMluvj+okCk5CFk2q4BjATGAuOyH/ej60XurATw\nJrAAeBWY5znWu5qyiG1IyUPCtN09geOAo/FL3UdvonatB+YB/wEe9hzrY815ipaUPKBM2y0FDsEv\n9vH4u+FhthB4GL/wi3SHKSZS8gDJ7oZPAWYAx+CfKIsiD3gEuN9zrJc1Z4k8KXkAmLY7CDgdOBP/\npFkxWQz8DviL51j1usNEkZRck+yw1pHA2fi75MU+nNkA3A/c5TnW67rDRImUvJtlh7rOAK4AdtMc\nJ6gWANcDczzHkh/QLpKSdxPTdiuAc4Af4o9bi/a9AVQDD0nZO09KXmCm7Zbhl/tKYJDmOGG1EKj2\nHOsR3UHCSEpeQKbtTgeuBXbRnSUiFgA/9Rxrru4gYSIlLwDTdvcGZgKTdGeJqEeASzzH8nQHCQMp\neR6ZttsL+BlwGfouMS0Wm4DrgF/LdfNtk5LniWm7JwG/QXbNu9vbwFmeY83THSSopORdZNpuFXA3\ncKLuLEVMAXcCV3iO1ag7TNBIybvAtN1JwF+BobqzCMDfqn/Tc6y3dAcJEil5J2RvHvk58BNkdp2g\n2QRc5DnWLN1BgkJKniPTdofhb70P051FtOmvwLmeYzXoDqKblDwHpu1awH1E9+6wqHkP+Fax39oq\nu5odZNruRfjjs1Lw8NgTmGfablGfFJUteTuyd4vdiD/2LcIpg3+cPlN3EB2k5G0wbbcn8BfgZN1Z\nRF5cD9jFdrOLlLwVpu0OwN89H687i8ir/wNOL6ar5KTkLTBt18SfgLDYZmkpFs8AJ3mOVas7SHeQ\nkm/DtN1dgWeBXXVnEQX1MvD1Yhhik7PrW8gW/L9IwYvBeODR7HmXSJOSZ5m2Oxh4Cn/VEVEcvgo8\nmJ3YI7Kk5Gy+yeTfyDF4MZoK3J+9VDmSir7kpu32Bp4A9tGdRWhzMnBv9pqIyInkF9VR2cUM/gwc\nrDuL0G46cJPuEIVQ1CUHfoHcBy6+dKlpu9/XHSLfinYIzbTdacBswNCdRQRKEjjCc6zndAfJl6Is\nuWm7+wMvAr10ZxGBtA440HOslbqD5EPR7a5nL1d9GCm4aN0AYE5UhtaKquTZs6ezkYtdRPvGAbfq\nDpEPRVVy4HL8CyCE6Ihzo3AvetEck5u2uw/wGlCuO4sIlbXAPp5jrdcdpLOKYktu2m4cfzxcCi5y\nNRC4Q3eIriiKkgNXAaN1hxCh9U3Tdr+pO0RnRX533bTdg4F5QEx3FhFq6/F329fqDpKrSG/Js0Mg\n9yEFF13XH/id7hCdEemSAxcCI3WHEJFxomm7x+sOkavAltwwjKMNw1hqGMYHhmHYub7etN3++Mfi\nQuTT9abthmrPMJAlNwyjFP+M5lRgb+BUwzD2zvFtqoEd8hxNiL2Ac3SHyEUgSw6MBT5QSi1TSiWA\nvwEndPTFpu1+hZB9I0SoVJu221d3iI4KasmHAiu2+PNKcls59CbkZJsonP7AlbpDdFRQS97S7Z8d\nGuszbfco/N18IQrpkuzEn4EX1JKvBIZt8eedgdUdfO01+Y8jxHbKCcnPWiAvhjEMI4a/IuUUYBUw\nH/iOUqrNxeVN2z0SmFv4hEIAkAJ29xxrRbvP1CiQW3KlVAp/jHsu8A4wu72CZ4XmOElEQgy4RHeI\n9gRyS94Zpu2Ow18VQ4juVAcM8xyrTneQ1gRyS95Jl+sOIIpSH+As3SHaEoktuWm7uwDLgMhOkC8C\nbQX+sXlKd5CWRGVLfiFScKHPMCCwt6KGvuTZ5W1O051DFL2LdAdoTehLDhwBDNIdQhS98abt7qk7\nREuiUHLZioug+J7uAC0J9Yk303Z7AZ8ic6iLYPgI2M1zrECVKuxb8pOQgovg2BWYoDvEtsJe8um6\nAwixjW/pDrCt0O6uZ5c7WoMMnYlgWQ3sHKRd9jDfc30keSp43fyHaFj8JBgQH2DS/5hLoTTOhuf/\nTOO7L4BRQuXoY+hz0NbTe6Vq17Lun9eiVAbSaSrHHEvl6GMAaP7kA2rcW1CpBD33OIh+U87GMAw+\n/+8f2bTsNcoG7kb/Y38AQMOSp8k01dPnoA7PiyGCawj+evev6g7yhTCX/Ih8vEmqfj11rz3KkDNn\nUhIvZ91DDhvfeQ6UIl23jiFn3YVhlJDeuGG715b27seg6TdixOJkEptYPesCeg4fR6yyis+evIOq\noy+kbMhI1j5QTdOy1yjf+Ss0r3qHIWfczrpHbyCxziO2w2A2LvkPA79xdT6+HBEMUwhQycN8TJ6X\nkgOQSaNSCVQmjUo1U9p7R+oXPU7fQ07FMPx/otJe208XZ5TGMWJxAFQ6CdlDn1TDZ2SaN1E+9CsY\nhkHvUV+j8f2XAQOVTqGUQqUSGCWl1L36DyrHHI9RGubft2IbU3QH2FIoS27a7kj8iSS6LFbZnz5j\nT2LVnaez8vbTMMor6LnbgaQ+/4TGd55nzZ8u5dPZPyf52aoWX5+qW8fqey5k1czT6Tv+FGKVVaTr\na4hVVm1+TmllFemGGkrKK6jYayJr7r2YWN+dMMp7kVjzHhUjxufjSxHBMdG03cAsyRXWzUfetuLp\npgYa33+FoefOoqS8F+sedmh46xlUOokRizN4xm9oXDqPmiduZdB3r9/u9bE+Axhyxu2k6mtY989r\nqNjrENqaqarvuGn0HTcNgJonbmOHw6ZTv3guTcsXEh9ossPEb+frSxP69AQmAs/oDgIh3ZKTx5I3\neYuI9d2J0oq+GKUxKvacQPOqdyit7J8tLPTccwKJtV6b7xOrrCJetStNK96itLI/qfqazf8vXV9D\nae+qrZ6f+PRD/3X9hrJxydMMONEmue6jVvcYROh8TXeAL4Su5KbtlpDHNcZjfQaQWL2UTLIJpRRN\nHy0mXjWMihHjafpoMQDNK94kvuP2k8Wm6taTSTYD/h5B86q3iVftTKz3jpSU9aR51bsopWhY8jQV\nI8Zt9doNz/+Fvod+FzIpUBn/L40SVKo5X1+a0Cswx+Vh3F0fAeRtzuvyIXtRsdchrLn3UoySEsp2\n2oPK/Y9GpZpZ/+iN1M1/GKOsB1VT/ZuMmte8T8OiJ6iaejHJmhV8/sysze/VZ+zJlA0wAdjxyPOp\neTw7hLb7GHrsftDm5zW+9xJlg0ZsPm4vHzKS1bMuID7QpGzg7vn60oReB5u2W+45lvbf2qG7GMa0\n3W8As3XnEKIDDvAca7HuEKHbXQf21R1AiA4apTsAhLPk++kOIEQH7aM7AEjJhSgk2ZLnyrTd3oCp\nO4cQHSQl74RRtLxOmhBBZGYnNtEqbCUfrjuAEDkwgJG6Q4St5LksXyxEEAzWHSBsJc/LTSlCdKOB\nugOEreRDdAcQIkdS8hxp/wcTIkcDdAcIW8n76w4gRI60b5ik5EIUlpQ8R310BxAiR1LyHMn0yyJs\nKnQHCFvJw5ZXCO1zNrRbGsMwLjQMo193hOkAuaRVhI32vc+O/JYZBMw3DON14B5grtIw00R22idR\nIBeX/uOFPsbGjO4cUZOidANYWjO0W3Kl1E8Nw7gKf8WS04HbDcOYDcxSSn1Y6IBbkJIX0Gqqyi4r\nnXOQYci/c559pDtAh76h2S33J9lHCugHzDEMY/s5igtHdtULaE560th700e9oDtHBKV0B+jIMfnF\nhmG8BlwPvAjsq5Q6DxgDnFLgfKIb/SI14/BXMiOf050jYpK6A3RkS94fOFkpdZRS6gGlVBJAKZUB\nji1oui14jpUENnXX5ytW30789NBVqiow63hFQPC35EqpnymlWjyuUEq9k/9Ibapp/ymiKxQlJV9v\nvmGfjaq8u7+3UbVRd4CwnWRZrztAMWikR68jmm/sl1Ila3RniQDtS+JIyUWL1lA16JREdb1S1OvO\nEnJS8hxJybvRYjV8z8uS5y9VirTuLCG2UncAKblo00OZQw/6Q/qYF3XnCDEpeY6k5Br8KjX98Hnp\nvZ/VnSOkpOQ50n71ULH6bvLKw1aq/q/ozhFCckyeIxnW0SQ7tLbvRtVDvge5kS15jt7WHaCYbaK8\nYkrzjTumVIn2rVNIrKe6VvvSxaEquedY9QTgN2Mx+4Qddzo58YtGpajTnSUEPtYdAEJW8izZmmv2\nhtpjxEXJi95XSv8lmwH3mu4AICUXnfRYZsKYmenjX9KdI+Be1h0ApOSiC25IffuwZ9P7ydBa66Tk\nnbRIdwDxpRnJHx3+UWZgp3+Yz3h4EwNvqGfUzIbNf3fV003sd2cDB9zVwJF/3sjq+u0nrHlmeYoD\n7mrY/OhxTR0Pvevf1bn88wzj7m5gxG8b+NacRhJpfyKj376SYNTMBo7565d/98LHKS6f29TZ+G2p\nJSCjQWEs+UICcGeP+IJhHJX49f71qudbnXn1/xwQ51/Tt57Q9IpDynnjvN4sOrc3x+4Z4+pntz9B\nPXm3GIvO9Z/z9IxeVMThyD38iY5+9J8mLhtfzvsX9aZfD4NZr/vlv3thgjfO68XoQSXM/SCFUopf\nPtfMVYeXdyZ6e16lurbbp0lrSehK7jlWioDsBglfE+U9pzTfODCpSnMe+Th81xg79tx60p8+5V/+\neWOi/SmB5rydZOqIGBVxA6UUTy9PM21vv/Az9o/z0NIv521IpqExCfFSgz+/keSY4TH69SzIpEOB\n+RkNXcmzntcdQGxtLf0GnJD4ZbNS1Obj/X7yVBPDbqnnr28muXpy21vavy1JcuqoOAA1mxQ79IBY\niV/cnfuUsKrO36D+cEIZ42dtZF2j4pBhpfxpcZLzDy7LR9yWSMm76BndAcT23lbmHhckL/kwH0Nr\n107pwYrLKvnuvnFufzXR6vPW1Gd4c22Go7K76i3NI2xkN9Sn7V/GwnN685eTe3LzSwkuHlvGEx+k\nmDa7kcv+1UQmv5MQS8m76CXkuDyQHs+MO/D29Il5G1r7zr5xHnyn9d8Zs99KctLIGPFSv8n9Kww2\nNEEq4xd2ZV2GIZVb746vrs8wf3WaE0bGuea5Zv4+rSflMXhqWd7uqF1Kde1n+XqzrgplybPzvcnQ\nTUDdlPrmYU+nD+j09+f9mi/L9sjSFCP7t/5jev+S1OZddQDDMJi8Wylz3vZ/MfxpcZIT9opv9Zqr\nnm7ml9lDgE0pf0tfYkBjMm9b8kfy9Ub5EMqSZ/1LdwDRujOSVxy+PLNTu1v0Ux9sZMKsjSytybDz\nzfXMej2B/VQzo2Y2sN+dDTy5LMWtR/cAYMHqNN9/5Mu5PL0NGVbUZZhkbr1Iya+P6MHNLzUz/LZ6\najYpzhz9ZckXrvF/gYwe7L/mzNFx9r1zI6+vSXP08LytaDQ7X2+UD4aGxVDywrTdwfjXsYf5F1Wk\nlZNoml9+3gd9jE2jdGfpRsuprt1dd4gthbYgnmOtAWSO8ABrpqzH5OabByVV6QrdWbrRA7oDbCu0\nJc+6X3cA0bYa+vY/LnFtMl9DayEgJc+zBwnAChWibe+qXXY/J3nZMqUi/71aRnXtAt0hthXqknuO\nVQP8R3cO0b4nMwePviU1LerTRwVuKw4hL3mW7LKHxG3pkw+dmx4T5aFPKXmBPASyAEBYnJO8/PAP\nM4Pn6c5RAO9TXRuISSK2FfqSZ6eE+qPuHKKjDGNqwhlTqyre0J0kz+7QHaA1oS951q3A9jcdi0BK\nEC+f3HzT0KQqjcoU23XAPbpDtCYSJfccaxkBu5RQtO0z+lZZiV9lMorPdWfJgz9SXRvYQ8ZIlDzr\nZt0BRG7eU8N2Oyv5g4+UovXbzIIvA9zW3pMMw7jHMIy1hmEs6YZMW4lMyT3Hep6AzI4pOu6pzJgD\nbkh9a77uHF0wh+raZR143r3A0QXO0qLIlDzrFt0BRO5mpk845PH02DAOrSng2g49UannAC23n0at\n5H8H3tcdQuTu/OSlk97LDA3b6qmPUV0b+FGCSJU8O//bj3XnEJ1jJa47eIPqtVh3jhxcoztAR0Sq\n5ACeYz2IP3OMCJkksbKvNt+8S0LFluvO0gEPUV37qu4QHRG5kmddoTuA6JwNVPabmriuJKOMwEyf\n1IJNwGW6Q3RUJEvuOdaLwD915xCd86EauusZyStWKoX2FUFb4VBd6+XyAsMw7sffw9zLMIyVhmGc\nWZBkLYhkybNskAX5wuq/mQP2uy71ncDdtgksA67P9UVKqVOVUoOVUnGl1M5KqVkFyNaiyJbcc6z3\ngLt05xCd9/v0sYc8kp4QtKG1S6iuLci6SoUS2ZJn/QRYpTuE6LyLkxdNejcz7AXdObIeo7r2Md0h\nchXpknuOVQecrzuH6JrjEteO/UxV6l7osgm4RHOGTol0yQE8x3qEgE2RK3KTJFY2ufkms1nv0NoN\nHbx8NXAiX/KsC4BPdYcQnVdL7x2OTjilGWXUaPj0bwK/0vB586IoSu451nrgLN05RNcsV0N2mZH8\n0Wql6M4TXw3AN8J2sm1LRVFyAM+xHgW6bdhCFMbzmf32/WXqtNeVortWBTmb6tql3fS5CqJoSp51\nEaD7BI7oonvSUyf+M3Nodyys8Tuqa0M/UWhRldxzrE3ASYCO4zqRR5cnz5+0JGMWcp36RcClBXz/\nbhPatdC6wrTdrwNPAKXtPVcEV4xU8pXyC5ZUGfWj8/zWdcAYqms/yPP7alFUW/IveI71b/wLZUSI\npYjFJzfftHuzin+Y57f+flQKDkVacgDPsX4NzNGdQ3RNHb37HpVwyjLKWJent7yN6tpALpLQWUVb\n8qzTgTBNUiBa4KnBw6Ynf7xWKTa1/+w2/Y0Q3ULaUUVdcs+xGoAjgfd0ZxFdMy8zap+fp2Ys6sLQ\nmgt8j+rayM3fX9QlB/Acay1wBPCx7iyia+5LHzXhgfSkzgytPQtMo7o2kquuFuXZ9ZaYtjsCeB7Y\nSXcW0TUPl/30+f1Llh3WwacvAL4W5MURukpKvgXTdvcD/gv00xxFdEEp6dTL5RcuHmDUjmnnqW8D\nh1NdG+nrJqTk2zBtdzzwb6C37iyi83rTWLeg/Ly1PYzk8Faeshw4lOra1d2ZS4eiPybfludYLwOT\ngbW6s4jOa6Ciz5GJ63ukldHS93E5cEQxFByk5C3yHGsBMBHI90UWoht9rHba+TuJn6xXisYt/noh\nMDGs94Z3hpS8FZ5jfYhfdFlfLcReUXvvfWXqzDeVIoN/GDaJ6tpPdOfqTlLyNmSH174KzNUcRXTB\n/ekp4+5KH3c9YEX5LHpr5MRbB5i2Gwf+AMzQnUXkLANcmb2MuShJyXNg2u7FwI1AXHcW0SENwHTP\nsR7WHUQnKXmOTNudCDwADNGdRbTpdeDU7Pz7RU2OyXPkOdY84AD8+9FF8CjgJmCCFNwnW/JOMm3X\nAC4HrkN234PiE2CG51hP6g4SJFLyLjJtdzRwN3Cg7ixF7nHg9OyIiNiC7K53kedYC4Gx+Fv1jZrj\nFKMa4BzgWCl4y2RLnkem7e4C3AEcqztLEcjg70Fd6TlWpG8w6SopeQGYtnsKcBtyBr5Q5gMXeI41\nX3eQMJDd9QLwHOtB4CvA1UDRXWFVQF/smo+XgnecbMkLzLTdKuB/gQuBCs1xwuoz4DfAbz3H2qA7\nTNhIybuJabuDgCuBs4FyzXHCYj3+mPcdnmPJHlEnScm7mWm7w4AfAd8DKjXHCapP8S8fvtNzLBmx\n6CIpuSam7fYGTgPOA/bVHCcoFgF3Afdll7QSeSAlDwDTdg8FzgdOAco0x+luDcBs4PeeY72iO0wU\nSckDxLTdgcB04BvAOMDQm6hgMvgTZt4L/EN2yQtLSh5Q2WP3acDxwKFATG+iLqvFn3zDBZ7wHCtf\nyxqJdkjJQ8C03X7AVOAo/CmpWpuBNGjewS/1Y8CLnmOlNOcpSlLyEDJtdwAwAb/wE4GDgJ5aQ/mz\n2y7IPuYDCzzHKqq51IJKSh4B2empRuFv4YcDe2zxcSj5O7ZvBlbgLyn1xWMxMN9zrBV5+hwiz6Tk\nEWfabg/ABKrwx+X7ZB+VW3yMA0kglf3YiH/WuwHYAKzEL/Raz7HkByZkpORCRJzcoCJExEnJhYg4\nKbkQESclFyLipORCRJyUXIiIk5ILEXFSciEiTkouRMRJyYWIOCm5EBEnJRci4qTkQkSclFyIiJOS\nCxFxUnIhIk5KLkTEScmFiDgpuRARJyUXIuKk5EJEnJRciIiTkgsRcVJyISJOSi5ExEnJhYg4KbkQ\nESclFyLipORCRJyUXIiIk5ILEXH/D7h70KJr/+S3AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "train['y'].value_counts().plot.pie(autopct = '%1.2f%%')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_train = train['y']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(25000,)"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i in range(96,158):\n",
    "    col = 'x'+'_'+str(i)\n",
    "    if col in x.columns.values:\n",
    "        dummies_df = pd.get_dummies(x[col]).rename(columns=lambda x: col + str(x))\n",
    "        x = pd.concat([x, dummies_df], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x_1</th>\n",
       "      <th>x_2</th>\n",
       "      <th>x_3</th>\n",
       "      <th>x_4</th>\n",
       "      <th>x_5</th>\n",
       "      <th>x_6</th>\n",
       "      <th>x_7</th>\n",
       "      <th>x_8</th>\n",
       "      <th>x_9</th>\n",
       "      <th>x_10</th>\n",
       "      <th>...</th>\n",
       "      <th>x_1561</th>\n",
       "      <th>x_1562</th>\n",
       "      <th>x_1563</th>\n",
       "      <th>x_157-99</th>\n",
       "      <th>x_1571</th>\n",
       "      <th>x_1572</th>\n",
       "      <th>x_1573</th>\n",
       "      <th>x_1574</th>\n",
       "      <th>x_15710</th>\n",
       "      <th>x_15711</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.354167</td>\n",
       "      <td>0.604988</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.125000</td>\n",
       "      <td>0.012058</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.565979</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.208333</td>\n",
       "      <td>0.316209</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.208333</td>\n",
       "      <td>0.008061</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>-99</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 364 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        x_1       x_2  x_3  x_4  x_5  x_6  x_7  x_8  x_9  x_10   ...     \\\n",
       "0  0.354167  0.604988  -99  -99  -99  -99  -99  -99  -99   -99   ...      \n",
       "1  0.125000  0.012058  -99  -99  -99  -99  -99  -99  -99   -99   ...      \n",
       "2  0.333333  0.565979    0    0    0    0    0    0    0     0   ...      \n",
       "3  0.208333  0.316209    0    0    0    0    1    1    0     0   ...      \n",
       "4  0.208333  0.008061  -99  -99  -99  -99  -99  -99    0     1   ...      \n",
       "\n",
       "   x_1561  x_1562  x_1563  x_157-99  x_1571  x_1572  x_1573  x_1574  x_15710  \\\n",
       "0       0       0       1         1       0       0       0       0        0   \n",
       "1       0       1       0         0       0       1       0       0        0   \n",
       "2       0       1       0         0       0       1       0       0        0   \n",
       "3       0       1       0         0       0       0       0       1        0   \n",
       "4       0       1       0         0       1       0       0       0        0   \n",
       "\n",
       "   x_15711  \n",
       "0        0  \n",
       "1        0  \n",
       "2        0  \n",
       "3        0  \n",
       "4        0  \n",
       "\n",
       "[5 rows x 364 columns]"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(25000, 364)\n",
      "(10000, 364)\n"
     ]
    }
   ],
   "source": [
    "train_X = x[0:25000]\n",
    "test_X = x[25000:35000]\n",
    "print(train_X.shape)\n",
    "print(test_X.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn import metrics\n",
    "from sklearn.model_selection import train_test_split\n",
    "from xgboost import XGBClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_train,X_val,y_train,y_val= train_test_split(train_X,y_train,test_size=0.2,random_state=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "gbm = XGBClassifier( n_estimators= 150, max_depth= 6, min_child_weight= 2, gamma=0.9, subsample=0.8, \n",
    "                        colsample_bytree=0.8, objective= 'binary:logistic', nthread= -1, scale_pos_weight=1).fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9385598608876429\n"
     ]
    }
   ],
   "source": [
    "predictions = gbm.predict_proba(X_val)\n",
    "pre = predictions[:,1]\n",
    "val_auc = metrics.roc_auc_score(y_val,pre)#验证集上的auc值\n",
    "print(val_auc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(10000,)"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preds = gbm.predict_proba(test_X)\n",
    "pred = preds[:,1]\n",
    "pred.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Submission = pd.DataFrame({'cust_id': test['cust_id'], 'pred_prob': pred})\n",
    "Submission.to_csv('../result/xgb_sub1.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
